FastQC

Raw

tb_fq <- tb_samps %>%
  filter(file.exists(fq_raw1) & file.exists(fq_raw2)) %>%
  select(name,cell_line,epitope,rep,starts_with("fq_raw"))

fq_raw1 <- clugPac::read_fastqc(tb_fq$fq_raw1)
fq_raw2 <- clugPac::read_fastqc(tb_fq$fq_raw2)

fq_raw  <- lapply(names(fq_raw1), function(nm){
  tb1   <- fq_raw1[[nm]]
  tb2   <- fq_raw2[[nm]]
  
  if(!is.null(tb1)){
    tb1 <- mutate(tb1,read="R1")
  }
  if(!is.null(tb2)){
    tb2 <- mutate(tb2,read="R2")
  }
  if(!is.null(tb1) & !is.null(tb2)){
    tb <- rbind(tb1,tb2)
  }else if(!is.null(tb1)){
    tb <- tb1
  }else if(is.null(tb2)){
    tb <- tb2
  }else{
    tb <- NULL
  }
  return(as_tibble(tb))
}) %>% setNames(names(fq_raw1))

Flag summary

if(is.null(fq_raw$summary)){
  p <- ggplot() + theme_void()
}else{
  tb_p  <- fq_raw$summary %>%
    mutate(name = gsub("_val_[12]$","",name)) %>%
    separate(name,into=c("cell_line","epitope","cond","rep"),sep = "_",remove = FALSE) %>%
    select(-cond) %>%
    mutate(cell_line = factor(cell_line),
           epitope = factor(epitope,levels=c("KLF5","IgG")),
           read = factor(read),
           rep = factor(rep),
           flag = factor(flag,levels=c("pass","warn","fail")),
           title = factor(title),
           idx = row_number()) %>%
    rowwise %>%
    mutate(x = ifelse(read == "R1",list(c(0,1,0)),list(c(0,1,1))),
           y = ifelse(read == "R1",list(c(1,1,0)),list(c(0,1,0)))) %>%
    ungroup %>%
    unnest(c(x,y)) %>%
    mutate(x = x + (as.integer(rep)-1),
           y = y + (as.integer(title)-1))
  
  y_labs  <- levels(tb_p$title)
  y_labs  <- setNames(c(1:length(y_labs))-0.5,y_labs)
  x_labs  <- levels(tb_p$rep)
  x_labs  <- setNames(c(1:length(x_labs))-0.5,x_labs)
  p <- ggplot(tb_p,aes(x=x,y=y,group=idx,fill=flag)) +
    facet_wrap(.~cell_line,
               nrow=1,
               scales = "free_x") +
    scale_x_continuous(name = "Replicate",
                       expand=c(0,0),
                       breaks=x_labs,
                       labels = names(x_labs)) +
    scale_y_reverse(expand=c(0,0),
                       breaks = y_labs,
                       labels = names(y_labs))+
    scale_fill_manual(values=c(pass="lightgreen",
                               warn="orange",
                               fail="firebrick")) +
    geom_polygon(color="black") +
    annotate(geom = "text",x=0.05,y=0.95,hjust=0,vjust=0,label="R1",size=2.5) +
    annotate(geom = "text",x=0.95,y=0.05,hjust=1,vjust=1,label="R2",size=2.5) +
    ggtitle("Raw reads") +
    theme_minimal() +
    theme(axis.title.y = element_blank(),
          legend.title = element_blank(),
          plot.background= element_rect(fill="white",color=NA))
}
plot(p)

FastQ Reads

tb_stats <- fq_raw$basic_statistics %>%
  filter(read == "R1") %>%
  mutate(name = gsub("_val_1","",name),
         measure = gsub(" ","_",gsub("%","pct_",tolower(measure)))) %>%
  pivot_wider(id_cols = name,
              names_from = measure,
              values_from = value) %>%
  select(name,total_sequences,total_bases,sequence_length,pct_gc) %>%
  mutate(total_sequences = as.double(total_sequences)) %>%
  left_join(by="name",
    select(tb_samps,name,cell_line,epitope,rep)) %>%
  arrange(cell_line,epitope) %>%
  mutate(name = factor(name,levels=select(.,name) %>% unlist %>% unique)) %>%
  select(cell_line,epitope,rep,everything())

tb_stats %>%
  mutate(total_sequences = prettyNumbers(total_sequences)) %>%
  kable %>%
  kable_styling(full_width=FALSE)
cell_line epitope rep name total_sequences total_bases sequence_length pct_gc
OS774 KLF5 1 OS774_KLF5_NONE_1 126.17M 6.3 Gbp 20-51 44
OS774 KLF5 2 OS774_KLF5_NONE_2 150.21M 7.5 Gbp 20-51 43
OS774 KLF5 3 OS774_KLF5_NONE_3 110.73M 5.5 Gbp 20-51 44
OS774 IgG 1 OS774_IgG_NONE_1 199.65M 10 Gbp 20-51 42
OS774 IgG 2 OS774_IgG_NONE_2 203.99M 10.2 Gbp 20-51 41
OS774 IgG 3 OS774_IgG_NONE_3 125.4M 6.2 Gbp 20-51 43
OS833 KLF5 1 OS833_KLF5_NONE_1 65.95M 3.3 Gbp 20-51 42
OS833 KLF5 2 OS833_KLF5_NONE_2 92.4M 4.6 Gbp 20-51 42
OS833 KLF5 3 OS833_KLF5_NONE_3 76.19M 3.8 Gbp 20-51 42
OS833 IgG 1 OS833_IgG_NONE_1 116.2M 5.7 Gbp 20-51 43
OS833 IgG 2 OS833_IgG_NONE_2 141.05M 7 Gbp 20-51 42
OS833 IgG 3 OS833_IgG_NONE_3 105.41M 5.2 Gbp 20-51 42
ggplot(tb_stats,aes(x=name,y=total_sequences,fill=epitope,label=paste0(prettyNumbers(total_sequences,digs = 1),"\n"))) +
  facet_wrap(.~cell_line,scales="free_x",nrow=1) +
  scale_y_continuous(labels = prettyNumbers,name="Reads",expand=expansion(mult=c(0,0.1))) +
  #scale_fill_manual(values=c(KLF5="lightblue",IgG = "gray50")) +
  geom_bar(stat='identity',position = "dodge",color='black',linewidth=0.5) +
  geom_text(size=2) +
  ggtitle("FastQ read counts",subtitle= "Unfiltered") +
  theme(plot.background = element_rect(fill="white",color=NA),
        plot.subtitle = element_text(face="italic"),
        panel.background = element_blank(),
        panel.border = element_rect(fill = NA,color='black'),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color="gray80"),
        legend.title = element_blank(),
        axis.text.x = element_text(angle=45,hjust=1,vjust=1,size=5),
        axis.ticks.x = element_blank(),
        strip.background = element_blank(),
        axis.title.x = element_blank())

Per-base quality scores

tb_p <- fq_raw$per_base_sequence_quality %>%
  mutate(base = ifelse(read == "R2",-base,base),
         name = gsub("_val_[12]$","",name))

ggplot(tb_p,aes(x=base,y=mean,color=name)) +
  facet_wrap(.~read,nrow=1,scales="free_x") +
  scale_x_continuous(name="Base (bp)",expand=c(0,0),labels=abs) + 
  scale_y_continuous(name="Quality score") +
  geom_line() +
  ggtitle("Per-base quality scores",subtitle= "Unfiltered") +
  theme(plot.background = element_rect(fill="white",color=NA),
        plot.subtitle = element_text(face="italic"),
        panel.background = element_blank(),
        panel.border = element_rect(fill = NA,color='black'),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color="gray80"),
        legend.title = element_blank(),
        axis.ticks.x = element_blank(),
        strip.background = element_blank())

Per-base sequence content

tb_p <- fq_raw$per_base_sequence_content %>%
  mutate(name = gsub("_val_[12]$","",name),
         base = ifelse(read == "R2",-base,base)) %>%
  as_tibble %>%
  left_join(select(tb_samps,name,cell_line,epitope,rep),by="name") %>%
  mutate(cond = paste(cell_line,rep)) %>%
  rename(position = base) %>%
  pivot_longer(cols=c(a,c,g,t),names_to = "base",values_to="percent")

ggplot(tb_p,aes(x=position,y=percent,color=cond,linetype=base)) +
  facet_grid(rows=vars(epitope),cols=vars(read),scales="free_x") +
  scale_x_continuous(name="Base",expand=c(0,0),labels=abs) +
  scale_y_continuous(name="Percent",labels = function(x) paste0(x,"%")) +
  geom_line() +
  ggtitle("Per-base sequence content",subtitle="Unfiltered") +
  theme(plot.background = element_rect(fill="white",color=NA),
          plot.subtitle = element_text(face="italic"),
          panel.background = element_blank(),
          panel.border = element_rect(fill = NA,color='black'),
          panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_line(linewidth=0.5,color="gray80"),
          legend.title = element_blank(),
          axis.ticks.x = element_blank(),
          strip.background = element_blank())

Per-sequence GC content

tb_p  <- fq_raw$per_sequence_gc_content %>%
  mutate(name = gsub("_val_[12]$","",name),
         read = factor(read)) %>% 
  left_join(select(tb_samps,name,cell_line,epitope,rep),by="name") %>%
  mutate(name = paste(cell_line,rep))

ggplot(tb_p,aes(x=gc_content,y=count,color=name,group=name)) +
  facet_grid(rows=vars(epitope),cols=vars(read),scales="free") +
  scale_x_continuous(name = "GC content",labels=function(x) paste0(x,"%")) +
  scale_y_continuous(name = "Reads",labels=prettyNumbers) +
  geom_line() +
  ggtitle("Per-sequence GC content",subtitle="Unfiltered") +
  theme(plot.background = element_rect(fill="white",color=NA),
            plot.subtitle = element_text(face="italic"),
            panel.background = element_blank(),
            panel.border = element_rect(fill = NA,color='black'),
            panel.grid.major.x = element_blank(),
            panel.grid.major.y = element_line(linewidth=0.5,color="gray80"),
            legend.title = element_blank(),
            axis.ticks.x = element_blank(),
            strip.background = element_blank())

Aligned & filtered

tb_fq <- tb_samps %>%
  mutate(fq_flt_hg38 = paste0(qc_dir,"/",name,"_hg38_short_fastqc.zip")) %>%
         #fq_flt_sac3 = paste0(qc_dir,"/",name,"_sac3_short_fastqc.zip")) %>%
  filter(file.exists(fq_flt_hg38)) %>%
  select(name,cell_line,epitope,rep,starts_with("fq"))

fq_hg38 <- clugPac::read_fastqc(tb_fq$fq_flt_hg38)
#fq_sac3 <- clugPac::read_fastqc(tb_fq$fq_flt_sac3)

Flag summary

tb_p  <- fq_hg38$summary %>%
  mutate(name = gsub("_hg38_short$","",name)) %>%
  separate(name,into=c("cell_line","epitope","cond","rep"),sep = "_",remove = FALSE) %>%
  select(-cond) %>%
  mutate(cell_line = factor(cell_line),
         epitope = factor(epitope,levels=c("KLF5","IgG")),
         rep = factor(rep),
         flag = factor(flag,levels=c("pass","warn","fail")),
         title = factor(title,levels=select(.,title) %>% unlist %>% unique %>% sort %>% rev))

ggplot(tb_p,aes(x=rep,y=title,fill=flag)) +
  facet_wrap(.~cell_line,nrow=1) +
  scale_x_discrete(name="Replicates") +
  scale_fill_manual(values=c(pass="lightgreen",
                             warn="orange",
                             fail="firebrick")) +
  geom_tile(color="black") +
  ggtitle("Filtered reads") +
  theme_minimal() +
  theme(axis.title.y = element_blank(),
        legend.title = element_blank(),
        plot.background= element_rect(fill="white",color=NA),
        panel.grid = element_blank())

FastQ Reads

tb_stats <- fq_hg38$basic_statistics %>%
  as_tibble %>%
  mutate(name = gsub("_hg38_short","",name),
         measure = gsub(" ","_",gsub("%","pct_",tolower(measure)))) %>%
  pivot_wider(id_cols = name,
              names_from = measure,
              values_from = value) %>%
  select(name,total_sequences,total_bases,sequence_length,pct_gc) %>%
  mutate(total_sequences = as.double(total_sequences)) %>%
  left_join(by="name",
    select(tb_samps,name,cell_line,epitope,rep)) %>%
  arrange(cell_line,epitope) %>%
  mutate(name = factor(name,levels=select(.,name) %>% unlist %>% unique)) %>%
  select(cell_line,epitope,rep,everything())

tb_stats %>%
  mutate(total_sequences = prettyNumbers(total_sequences)) %>%
  kable %>%
  kable_styling(full_width=FALSE)
cell_line epitope rep name total_sequences total_bases sequence_length pct_gc
OS774 KLF5 1 OS774_KLF5_NONE_1 99.76M 5 Gbp 20-51 42
OS774 KLF5 2 OS774_KLF5_NONE_2 164.25M 8.2 Gbp 20-51 42
OS774 KLF5 3 OS774_KLF5_NONE_3 88.74M 4.4 Gbp 20-51 42
OS774 IgG 1 OS774_IgG_NONE_1 50.36M 2.5 Gbp 20-51 42
OS774 IgG 2 OS774_IgG_NONE_2 62.9M 3.1 Gbp 20-51 41
OS774 IgG 3 OS774_IgG_NONE_3 135.74M 6.8 Gbp 20-51 41
OS833 KLF5 1 OS833_KLF5_NONE_1 67.43M 3.3 Gbp 20-51 41
OS833 KLF5 2 OS833_KLF5_NONE_2 93.77M 4.7 Gbp 20-51 41
OS833 KLF5 3 OS833_KLF5_NONE_3 83.54M 4.2 Gbp 20-51 41
OS833 IgG 1 OS833_IgG_NONE_1 150.03M 7.4 Gbp 20-51 42
OS833 IgG 2 OS833_IgG_NONE_2 120.28M 6 Gbp 20-51 41
OS833 IgG 3 OS833_IgG_NONE_3 73.2M 3.6 Gbp 20-51 41
ggplot(tb_stats,aes(x=name,y=total_sequences,fill=epitope,label=paste0(prettyNumbers(total_sequences,digs = 1),"\n"))) +
  facet_wrap(.~cell_line,scales="free_x",nrow=1) +
  scale_y_continuous(labels = prettyNumbers,name="Reads",expand=expansion(mult=c(0,0.1))) +
  #scale_fill_manual(values=c(KLF5="lightblue",IgG = "gray50")) +
  geom_bar(stat='identity',position = "dodge",color='black',linewidth=0.5) +
  geom_text(size=2) +
  ggtitle("FastQ read counts",subtitle= "Filtered") +
  theme(plot.background = element_rect(fill="white",color=NA),
        plot.subtitle = element_text(face="italic"),
        panel.background = element_blank(),
        panel.border = element_rect(fill = NA,color='black'),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color="gray80"),
        legend.title = element_blank(),
        axis.text.x = element_text(angle=45,hjust=1,vjust=1,size=5),
        axis.ticks.x = element_blank(),
        strip.background = element_blank(),
        axis.title.x = element_blank())

Per-base quality scores

tb_p <- fq_hg38$per_base_sequence_quality %>%
  mutate(name = gsub("_hg38_short$","",name))

ggplot(tb_p,aes(x=base,y=mean,color=name)) +
  scale_x_continuous(name="Base (bp)",expand=c(0,0),labels=abs) + 
  scale_y_continuous(name="Quality score") +
  geom_line() +
  ggtitle("Per-base quality scores",subtitle= "Filtered") +
  theme(plot.background = element_rect(fill="white",color=NA),
        plot.subtitle = element_text(face="italic"),
        panel.background = element_blank(),
        panel.border = element_rect(fill = NA,color='black'),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color="gray80"),
        legend.title = element_blank(),
        axis.ticks.x = element_blank(),
        strip.background = element_blank())

Per-base sequence content

tb_p <- fq_hg38$per_base_sequence_content %>%
  mutate(name = gsub("_hg38_short$","",name)) %>%
  as_tibble %>%
  left_join(select(tb_samps,name,cell_line,epitope,rep),by="name") %>%
  mutate(cond = paste(cell_line,rep)) %>%
  rename(position = base) %>%
  pivot_longer(cols=c(a,c,g,t),names_to = "base",values_to="percent")

ggplot(tb_p,aes(x=position,y=percent,color=cond,linetype=base)) +
  facet_grid(rows=vars(epitope)) +
  scale_x_continuous(name="Base",expand=c(0,0),labels=abs) +
  scale_y_continuous(name="Percent",labels = function(x) paste0(x,"%")) +
  geom_line() +
  ggtitle("Per-base sequence content",subtitle="Filtered") +
  theme(plot.background = element_rect(fill="white",color=NA),
          plot.subtitle = element_text(face="italic"),
          panel.background = element_blank(),
          panel.border = element_rect(fill = NA,color='black'),
          panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_line(linewidth=0.5,color="gray80"),
          legend.title = element_blank(),
          axis.ticks.x = element_blank(),
          strip.background = element_blank())

Per-sequence GC content

tb_p  <- fq_hg38$per_sequence_gc_content %>%
  mutate(name = gsub("_hg38_short$","",name)) %>% 
  left_join(select(tb_samps,name,cell_line,epitope,rep),by="name") %>%
  mutate(name = paste(cell_line,rep))

ggplot(tb_p,aes(x=gc_content,y=count,color=name,group=name)) +
  facet_grid(rows=vars(epitope)) +
  scale_x_continuous(name = "GC content",labels=function(x) paste0(x,"%")) +
  scale_y_continuous(name = "Reads",labels=prettyNumbers) +
  geom_line() +
  ggtitle("Per-sequence GC content",subtitle="Filtered") +
  theme(plot.background = element_rect(fill="white",color=NA),
            plot.subtitle = element_text(face="italic"),
            panel.background = element_blank(),
            panel.border = element_rect(fill = NA,color='black'),
            panel.grid.major.x = element_blank(),
            panel.grid.major.y = element_line(linewidth=0.5,color="gray80"),
            legend.title = element_blank(),
            axis.ticks.x = element_blank(),
            strip.background = element_blank())

Alignment stats

Alignment percentages are calculated as reads successfully mapped to hg38 or sac3 genomes / total reads.

When calculating scores between samples, the ratio of Sac3 to hg38 reads can be used to normalize: we assume all samples had an equivalent number of Sac3 reads spiked, and thus any missing Sac3 reads indicate missing hg38 reads as well. For this reason, we can scale down reads of all samples to match the sample with the lowest ratio. In other words, we assume the lowest ratio sample lost reads and so we down-sample all others to match it.

For instance, if SampleA has a ratio of 0.02 and SampleB has a ratio of 0.01, each ratio is divided into 0.01 to give factors of 0.5 (SampleA) and 1 (SampleB). A peak with 100 reads in SampleA and 50 reads in SampleB would be multiplied by their respective factors to both be 50.

fls_spike <- tb_samps %>%
  filter(file.exists(spike_tsv)) %>%
  vectify(spike_tsv,name)

type_levs=c(ambiguous="gray",unmapped="pink",sac3="purple",hg38="darkgreen")

tb_p <- lapply(names(fls_spike), function(nm) {
    fread(fls_spike[nm],col.names=c("type","reads")) %>%
      mutate(type = case_when(grepl("_hg38_",type) ~ "hg38",
                              grepl("_sac3_",type) ~ "sac3",
                              TRUE ~ type),
             name = nm)
  }) %>% do.call(rbind,.) %>%
  group_by(name) %>%
  mutate(type = factor(type,levels=names(type_levs))) %>%
  ungroup %>%
  left_join(by="name",
    select(tb_samps,name,cell_line,epitope,rep)) %>%
  mutate(cond = paste(cell_line,rep),
         epitope = factor(epitope,levels=c("KLF5","IgG")))

tb_labs <- tb_p %>%
  mutate(stat = ifelse(type %in% c("hg38","sac3"),"aligned","unaligned")) %>%
  group_by(cond,name,epitope,stat) %>%
  summarize(total = sum(reads),.groups='drop_last') %>%
  mutate(frac = total / sum(total)) %>%
  ungroup %>%
  filter(stat=='aligned')

p1 <- ggplot(tb_p,aes(x=cond,y=reads,fill=type)) +
  facet_wrap(.~epitope,nrow=1,scales="free_x") +
  scale_y_continuous(name = "Reads",labels=prettyNumbers,expand=expansion(mult=c(0,0.1))) +
  scale_fill_manual(values=type_levs) +
  geom_bar(stat='identity') +
  geom_text(data=tb_labs,mapping=aes(x=cond,y=total,label=paste0("\n",percent(frac,accuracy=1))),
            color="white",inherit.aes=FALSE) +
  ggtitle("Read alignment",subtitle="Alignment percentage calculated from hg38 + sac3 reads.") +
  theme(plot.background = element_rect(fill="white",color=NA),
        plot.subtitle = element_text(face="italic"),
        panel.background = element_blank(),
        panel.border = element_rect(fill=NA,color="black"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color='gray75'),
        strip.background = element_blank(),
        axis.text.x = element_text(angle=90,hjust=1,vjust=0.5),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.title = element_blank())

tb_p2 <- tb_p %>%
  filter(type %in% c("sac3","hg38")) %>%
  group_by(name) %>%
  mutate(ratio = reads / sum(reads)) %>%
  filter(type == "sac3")
p2 <- ggplot(tb_p2,aes(x=cond,y=ratio,label=paste0("\n",percent(ratio,accuracy=0.01)))) +
  facet_wrap(.~epitope,nrow=1,scales="free_x") +
  scale_y_reverse(name= "Sac3/hg38 Ratio",labels=percent,expand=expansion(mult=c(0.1,0))) +
  geom_bar(stat='identity',fill='purple',color="black",linewidth=0.25) +
  geom_text() +
  theme(plot.background = element_rect(fill="white",color=NA),
        plot.subtitle = element_text(face="italic"),
        panel.background = element_blank(),
        panel.border = element_rect(fill=NA,color="black"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color='gray75'),
        strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.title = element_blank())

plot_grid(p1,p2,ncol=1,align='v',axis='lr',rel_heights = c(5,3))

Peak calling

For individual samples, peaks are called using the genome-wide median as a background.

FRiP

fls_frip <- tb_samps %>%
  filter(file.exists(frip)) %>%
  vectify(frip,name)

tb_p <- lapply(names(fls_frip),function(nm) {
  fread(fls_frip[nm]) %>%
    as_tibble %>%
    mutate(name = nm)
}) %>% do.call(rbind,.) %>%
  mutate(cell_line = str_extract(name,"^[:alnum:]+"),
         epitope = str_match(name,"^[:alnum:]+_([:alnum:]+)")[,2])

tb_lab <- tibble(x=0.5,y=0.3,label="Minimum FRiP\n(Cut&Run)",cell_line="OS774")

ggplot(tb_p,aes(x=name,y=frip,fill=epitope)) +
  facet_wrap(.~cell_line,scales='free_x',nrow=1,strip.position='top') +
  scale_y_continuous(name="FRiP",labels=percent,limits=c(0,1),expand=c(0,0)) +
  geom_bar(stat='identity',color='black',linewidth=0.25) +
  geom_hline(yintercept = 0.3,color='red',linetype='dashed') +
  geom_text(data=tb_lab,mapping=aes(x=x,y=y,label=label),
            color="red",hjust=0,
            inherit.aes=FALSE) +
  ggtitle("Fraction of Reads in Peaks") +
  theme(plot.background = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(fill=NA,color="black",linewidth = 0.5),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color='gray85'),
        strip.background = element_blank(),
        axis.text.x = element_text(angle=45,hjust=1,vjust=1),
        axis.title.x = element_blank())
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

Peak calling details

p_fls <- tb_samps %>%
  filter(file.exists(nPeaks)) %>%
  vectify(nPeaks,name)

tb_pks <- lapply(names(p_fls),function(nm){
  ascCutNRun::read_peak_file(p_fls[nm]) %>%
    mutate(name = nm)
}) %>% do.call(rbind,.) %>%
  left_join(by='name',
    select(tb_samps,name,cell_line,epitope,rep)) %>%
  mutate(epitope = factor(epitope,levels=c("KLF5","IgG"))) %>%
  arrange(epitope) %>%
  mutate(name = factor(name,levels=select(.,name) %>% unlist %>% unique),
         width = end - start)

tb_p <- tb_pks %>%
  group_by(name,cell_line,epitope,rep) %>%
  tally(name = "count") %>%
  ungroup

p_c <- ggplot(tb_p,aes(x=name,y=count,fill=epitope)) +
  facet_wrap(.~cell_line,nrow=1,scales="free_x") +
  scale_y_continuous(expand=expansion(mult=c(0,0.1)),labels=prettyNumbers,name="Peaks") +
  #scale_fill_manual(values=c(KLF5="lightblue",IgG="gray")) +
  geom_bar(stat="identity",color='black',linewidth=0.5) +
  theme(plot.background = element_rect(fill="white",color=NA),
        panel.background = element_blank(),
        panel.border = element_rect(fill=NA,color="black"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color='gray85'),
        strip.background = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.title = element_blank(),
        legend.position = 'none')

p_q <- ggplot(tb_pks,aes(x=name,y=nlog10q,fill=epitope)) +
  facet_wrap(.~cell_line,ncol=2,scales="free_x") +
  scale_y_log10(name="-log10(Q)",labels = prettyNumbers) +
  #scale_fill_manual(values=c(KLF5="lightblue",IgG="gray")) +
  geom_violin(draw_quantiles = 0.5,scale = "width") +
  theme(plot.background = element_rect(fill="white",color=NA),
        panel.background = element_blank(),
        panel.border = element_rect(fill=NA,color="black"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color='gray85'),
        strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.title = element_blank())

p_w <- ggplot(tb_pks,aes(x=name,y=width,fill=epitope)) +
  facet_wrap(.~cell_line,ncol=2,scales="free_x") +
  scale_y_log10(name="Width",labels = prettyBP) +
  #scale_fill_manual(values=c(KLF5="lightblue",IgG="gray")) +
  geom_violin(draw_quantiles = 0.5,scale = "width") +
  theme(plot.background = element_rect(fill="white",color=NA),
        panel.background = element_blank(),
        panel.border = element_rect(fill=NA,color="black"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linewidth=0.5,color='gray85'),
        strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_text(angle=45,hjust=1,vjust=1),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.title = element_blank(),
        legend.position = 'none')

cowplot::plot_grid(p_c,p_q,p_w,ncol=1,align='v',axis='lr',rel_heights = c(2,2,3))

Regional plots

tb_fls <- select(tb_pools,name,cell_line,epitope,nPeaks,pileup)

tb_npks <- vectify(tb_fls,nPeaks,name)

gr_npks <- lapply(names(tb_npks), function(nm) {
  read_peak_file(tb_npks[[nm]]) %>% 
    makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
    reduce %>%
    as_tibble %>%
    mutate(name = nm)
  }) %>% do.call(rbind,.) %>%
  mutate(seqnames = factor(seqnames,levels=paste0("chr",c(1:22,"X","Y")))) %>%
  filter(!is.na(seqnames)) %>%
  left_join(select(tb_fls,name,cell_line,epitope),by='name') %>%
  makeGRangesFromDataFrame(keep.extra.columns = TRUE)

# gr_window <- tb_npks %>%
#   filter(epitope == "KLF5") %>%
#   arrange(desc(nlog10q)) %>%
#   filter(row_number() == 1) %>%
#   makeGRangesFromDataFrame %>%
#   resize(width=width(.)*10,fix='end')

plot_region <- function(gr_window){
  tb_bdg  <- tb_fls %>%
    vectify(pileup,name) %>%
    scan_bdg(gr_regions = gr_window) %>%
    left_join(select(tb_fls,name,cell_line,epitope),by="name")
  
  tb_scales <- tb_bdg %>%
    group_by(cell_line) %>%
    mutate(ymax = max(score)) %>%
    ungroup %>%
    select(name,ymax) %>%
    distinct
  
  tb_pks <- subset_and_trim_gr(gr_npks,gr_window) %>% 
    as_tibble
    
  x_rng <- c(start(gr_window),end(gr_window))
  p <- ggplot(tb_bdg,aes(xmin=start,xmax=end,ymin=0,ymax=score,fill=epitope)) +
    facet_wrap(.~name,scales="free_y",strip.position = "right",ncol=1) +
    scale_x_continuous(name= grange_desc(gr_window),expand=c(0,0),limits=x_rng,labels=prettyBP) +
    scale_y_continuous(name= "Pileup",expand=expansion(mult=c(0,0.1)),labels=function(x) ifelse(x>0,prettyNumbers(x),""),n.breaks = 3) +
    #scale_fill_manual(values=c(KLF5="lightblue",IgG="grey")) +
    geom_rect(data=tb_pks,mapping=aes(xmin=start,xmax=end,ymin=0,ymax=Inf,fill = epitope),alpha=0.2,color='black',linewidth=0.1,linetype='dashed') +
    geom_rect(data=tb_scales,mapping=aes(ymax=ymax),xmin=-Inf,xmax=Inf,ymin=0,fill=NA,color=NA,inherit.aes=FALSE) +
    geom_rect() +
    geom_hline(yintercept=0,color='black',linewidth=0.5) +
    geom_vline(xintercept=x_rng[1],color='black',linewidth=0.5) +
    theme(plot.background = element_rect(fill="white",color=NA),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid = element_blank(),
          panel.spacing.y = unit(0,"lines"),
          strip.text.y.right = element_text(angle=0),
          strip.background = element_blank(),
          legend.position = "none")
  
  p_gn <- plot_gene_track(gr_window,gene_cache_file = gn_tsv) + 
    theme(legend.position = 'none',
          panel.grid = element_blank())
  
  cowplot::plot_grid(p_gn,p,ncol=1,align="v",axis='lr',rel_heights=c(1,10))
}

BCL2

BRD4

CCND1

CEACAM5

MMP2

MMP9

TP63

VEGFA